## 19 March 2013
## Danny Hill
## filename: LocCarChapter Forecasts
setwd("~/Documents/terror/LocCarChapter/new.forecast.stuff")

library(coda)
library(foreign)

chain1 <- read.coda("tf.out", "tf.ind")
dim(chain1)
#summary(chain1)

alphas<-chain1[,1:159]
alphas<-as.matrix(alphas)
dim(alphas)

betas<-chain1[,161:171]
betas<-as.matrix(betas)
dim(betas)
summary(betas)

all.dat<-read.dta("lagged.terror.dta")
ids1<-unique(all.dat$id)

oos.dat<-all.dat[all.dat$year==1997,]
dim(oos.dat)
head(oos.dat)
ids2<-unique(oos.dat$id)

ids.diff<-setdiff(ids1, ids2)
alphas.1997<-subset(alphas, select = -c(ids.diff))
dim(alphas.1997)

xmat<-cbind(oos.dat$lnvdiss, oos.dat$lcoerce, oos.dat$lvdiss, oos.dat$lmid, oos.dat$lcivsoc, oos.dat$lcontest, oos.dat$lgovtbv, oos.dat$lpart, oos.dat$lnvrep, oos.dat$lveto, oos.dat$lmacro)
colnames(xmat)<-c("nvdiss", "coerce", "vdiss", "mid", "civsoc", "contest", "govtbv", "part", "nvrep", "veto", "macro")
dim(xmat)
head(xmat)

xbeta<-xmat%*%t(betas)
dim(xbeta)

##add country-specific intercepts to linear predictor and transform to count, DO NOT ROUND
yhat<-exp(t(alphas.1997)+xbeta)
dim(yhat)

##convert predicted count to predicted Pr(Y>0|X)

phat<-1-exp(-yhat)

## generate predictions/CIs
pred.count<-t(apply(na.omit(yhat), 1, quantile,c(.5, .025, .975)))
round.count<-round(pred.count)

pred.prob<-t(apply(na.omit(phat), 1, quantile,c(.5, .025, .975)))

pred.count[1:10,]
pred.prob[1:10,]

## grab the observed data
junk<-cbind(oos.dat$allterror, oos.dat$id, oos.dat$ccode)

## create binary terror variable 
bin.terr<-as.numeric(junk[,1]>0)

## create binary predictions
bin.pred<-as.numeric(pred.prob[,1]>.5)

## create matrix with observations and predictions
obs.pred<-cbind(junk[,2], junk[,3], junk[,1], pred.count, round.count, bin.terr, pred.prob, bin.pred) 
dim(obs.pred)
summary(obs.pred)

## Precisely correct
summary(as.numeric(obs.pred[,7] == obs.pred[,3]))

## Observed within CI
summary(as.numeric(obs.pred[,3] <= obs.pred[,9] & obs.pred[,3] >= obs.pred[,8]))

obs.pred<-cbind(obs.pred, precise, in.CI)

## Performance measures using a cut-point of 0.5
##Sensitivity is the proportion of positives correctly classified, i.e. the bottom right cell divided by the sum
##of the bottom row (.86). Specificity is the proportion of negatives correctly classified, i.e. the upper left ##cell divided by the sum of the top row (.60). Accuracy is the proportion correctly classified, i.e. the sum of ##the upper left and bottom right cells divided by the sum of all cells (.78). Positive predictive value ##(sometimes called "precision") is the proportion of predicted positives that are actually positive, i.e. the ##bottom right cell divided by the sum of the right column (.85). Negative predictive value is the proportion of
## predicted zeros that are actually zeros, i.e. the top left cell divided by the sum of the left column (.61)

pred.table<-table(bin.terr,bin.pred)

tn<-pred.table[1,1]
fp<-pred.table[1,2]
fn<-pred.table[2,1]
tp<-pred.table[2,2]

print(sens<-tp/(fn+tp))
print(spec<-tn/(tn+fp))
print(acc<-(tp+tn)/(tp+tn+fp+fn))
print(ppv<-tp/(tp+fp))
print(npv<-tn/(tn+fn))

##ROC curve, and area under the ROC curve (AUC)
library(ROCR)

preds<-prediction(obs.pred[,8], obs.pred[,7])
auc<-performance(preds, "auc")
auc

roc<-performance(preds,"tpr","fpr")

pdf("terror_roc.pdf")
plot(roc, lwd=2, ylab="Pr(Correct | Obs > 0)", xlab="Pr(Incorrect | Obs = 0)")
abline(0, 1, lwd=2, lty=2)
legend("bottomright", c("Model (AUC = 0.80)", "Random Prediction"), lty=c(1, 2), lwd=c(2, 2), cex=.75)
dev.off()

##Brier score is the mean
summary((obs.pred[,8]-obs.pred[,7])^2)

##Separation plot
library(separationplot)
separationplot(obs.pred[,8], obs.pred[,7], lwd1=5, lwd2=2)

##Count plots 

## Create a matrix to store the probabilities
count.probs<-matrix(nrow=nrow(obs.pred), ncol=max(obs.pred[,3])+1)

## Grab predicted count from observed/predicted matrix
yhat2<-obs.pred[,4]

## Loop through each observed count value, generating probabilities for each observation

for (i in 0:(max(obs.pred[,3]))) {
	count.probs[,i+1] <- dpois(i, yhat2)
	}

## Create mean probability for each count. 
mean.count.probs<-apply(count.probs, 2, mean)
mean.count.probs
pred.freqs<-mean.count.probs*nrow(obs.pred)

## Predictive distribution, binning for counts > 5
## Uses PMF values for single values, CDF from upper-lower count for bins, can simply sum them since it's a PMF
pred.dist<-matrix(nrow=13, ncol=1)
pred.dist[1:6,]<-pred.freqs[1:6]
pred.dist[7,]<-sum(pred.freqs[7:16])
pred.dist[8,]<-sum(pred.freqs[17:26])
pred.dist[9,]<-sum(pred.freqs[27:51])
pred.dist[10,]<-sum(pred.freqs[52:101])
pred.dist[11,]<-sum(pred.freqs[102:201])
pred.dist[12,]<-sum(pred.freqs[202:301])
pred.dist[13,]<-sum(pred.freqs[302:512])

terr.freq<-as.matrix(table(obs.pred[,3]))
obs.freqs<-cbind(seq(1:length(terr.freq)), terr.freq)
obs.freqs

obs.dist<-matrix(nrow=13, ncol=1)
obs.dist[1:6,]<-obs.freqs[1:6,2]
obs.dist[7,]<-sum(obs.freqs[7:15,2])
obs.dist[8,]<-sum(obs.freqs[16:23,2])
obs.dist[9,]<-sum(obs.freqs[24:33,2])
obs.dist[10,]<-sum(obs.freqs[34:36,2])
obs.dist[11,]<-sum(obs.freqs[37:38,2])
obs.dist[12,]<-0
obs.dist[13,]<-sum(obs.freqs[39:40,2])

x<-seq(0,12)
bob<-cbind(x, pred.dist, obs.dist)

pdf("count_predictions.pdf")
plot(bob[,1], bob[,2], type="o", pch=2, lty=2, col="black", xlim=c(0, 12), ylim=c(0, 50), ylab="Frequency", xlab="Number of Attacks", cex.lab=.8, axes=F)
points(bob[,1], bob[,3], type="o", pch=0, lty=1, col="black", xlim=c(0, 12), ylim=c(0, 50))
axis(1, at=c(seq(0, 12)), labels=c("0", "1", "2", "3", "4", "5", "(5,15]", "(15,25]", "(25,50]", "(50,100]", "(100,200]", "(200,300]", "(300,511]"),  cex.axis=.52)
axis(2, at=c(seq(0, 50, by=10)), cex.axis=.52)
box()
legend("topright", c("Predicted", "Observed"), cex=.75, pch=c(2, 0), lty=c(2, 1), col=c("black", "black"))
dev.off()